home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
DOS Vuser Deluxe 2003 October
/
DOS Vuser Deluxe 2003 Oct - Disc 1.iso
/
FREE
/
INTERNET
/
iria107a.lzh
/
script
/
sre_parse.pyc
(
.txt
)
< prev
next >
Wrap
Python Compiled Bytecode
|
2000-11-17
|
21KB
|
728 lines
# Source Generated with Decompyle++
# File: in.pyc (Python 2.0)
import string
import sys
from sre_constants import *
SPECIAL_CHARS = '.\\[{()*+?^$|'
REPEAT_CHARS = '*+?{'
DIGITS = tuple('0123456789')
OCTDIGITS = tuple('01234567')
HEXDIGITS = tuple('0123456789abcdefABCDEF')
WHITESPACE = tuple(' \t\n\r\x0b\x0c')
ESCAPES = {
'\\a': (LITERAL, 7),
'\\b': (LITERAL, 8),
'\\f': (LITERAL, 12),
'\\n': (LITERAL, 10),
'\\r': (LITERAL, 13),
'\\t': (LITERAL, 9),
'\\v': (LITERAL, 11),
'\\\\': (LITERAL, ord('\\')) }
CATEGORIES = {
'\\A': (AT, AT_BEGINNING),
'\\b': (AT, AT_BOUNDARY),
'\\B': (AT, AT_NON_BOUNDARY),
'\\d': (IN, [
(CATEGORY, CATEGORY_DIGIT)]),
'\\D': (IN, [
(CATEGORY, CATEGORY_NOT_DIGIT)]),
'\\s': (IN, [
(CATEGORY, CATEGORY_SPACE)]),
'\\S': (IN, [
(CATEGORY, CATEGORY_NOT_SPACE)]),
'\\w': (IN, [
(CATEGORY, CATEGORY_WORD)]),
'\\W': (IN, [
(CATEGORY, CATEGORY_NOT_WORD)]),
'\\Z': (AT, AT_END) }
FLAGS = {
'i': SRE_FLAG_IGNORECASE,
'L': SRE_FLAG_LOCALE,
'm': SRE_FLAG_MULTILINE,
's': SRE_FLAG_DOTALL,
'x': SRE_FLAG_VERBOSE,
't': SRE_FLAG_TEMPLATE,
'u': SRE_FLAG_UNICODE }
class Pattern:
def __init__(self):
self.flags = 0
self.groups = 1
self.groupdict = { }
def getgroup(self, name = None):
gid = self.groups
self.groups = gid + 1
if name:
self.groupdict[name] = gid
return gid
class SubPattern:
def __init__(self, pattern, data = None):
self.pattern = pattern
if not data:
data = []
self.data = data
self.width = None
def dump(self, level = 0):
nl = 1
for op, av in self.data:
print level * ' ' + op,
nl = 0
if op == 'in':
print
nl = 1
for op, a in av:
print (level + 1) * ' ' + op, a
elif op == 'branch':
print
nl = 1
i = 0
for a in av[1]:
if i > 0:
print level * ' ' + 'or'
a.dump(level + 1)
nl = 1
i = i + 1
elif type(av) in (type(()), type([])):
for a in av:
pass
else:
print av,
nl = 0
if not nl:
print
def __repr__(self):
return repr(self.data)
def __len__(self):
return len(self.data)
def __delitem__(self, index):
del self.data[index]
def __getitem__(self, index):
return self.data[index]
def __setitem__(self, index, code):
self.data[index] = code
def __getslice__(self, start, stop):
return SubPattern(self.pattern, self.data[start:stop])
def insert(self, index, code):
self.data.insert(index, code)
def append(self, code):
self.data.append(code)
def getwidth(self):
if self.width:
return self.width
lo = hi = 0x0L
for op, av in self.data:
if op is BRANCH:
i = sys.maxint
j = 0
for av in av[1]:
(l, h) = av.getwidth()
i = min(i, l)
j = max(j, h)
lo = lo + i
hi = hi + j
elif op is CALL:
(i, j) = av.getwidth()
lo = lo + i
hi = hi + j
elif op is SUBPATTERN:
(i, j) = av[1].getwidth()
lo = lo + i
hi = hi + j
elif op in (MIN_REPEAT, MAX_REPEAT):
(i, j) = av[2].getwidth()
lo = lo + long(i) * av[0]
hi = hi + long(j) * av[1]
elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY):
lo = lo + 1
hi = hi + 1
elif op == SUCCESS:
break
self.width = (int(min(lo, sys.maxint)), int(min(hi, sys.maxint)))
return self.width
class Tokenizer:
def __init__(self, string):
self.string = string
self.index = 0
self._Tokenizer__next()
def __next(self):
if self.index >= len(self.string):
self.next = None
return None
char = self.string[self.index]
if char[0] == '\\':
try:
c = self.string[self.index + 1]
except IndexError:
raise error, 'bogus escape'
char = char + c
self.index = self.index + len(char)
self.next = char
def match(self, char, skip = 1):
if char == self.next:
if skip:
self._Tokenizer__next()
return 1
return 0
def get(self):
this = self.next
self._Tokenizer__next()
return this
def tell(self):
return (self.index, self.next)
def seek(self, index):
(self.index, self.next) = index
def isident(char):
return None if char <= char else None if char <= char else char == '_'
def isdigit(char):
return None if char <= char else char <= '9'
def isname(name):
if not isident(name[0]):
return 0
for char in name:
pass
return 1
def _group(escape, groups):
try:
gid = int(escape[1:])
if gid and gid < groups:
return gid
except ValueError:
pass
return None
def _class_escape(source, escape):
code = ESCAPES.get(escape)
if code:
return code
code = CATEGORIES.get(escape)
if code:
return code
try:
if escape[1:2] == 'x':
while source.next in HEXDIGITS and len(escape) < 4:
escape = escape + source.get()
escape = escape[2:]
if len(escape) != 2:
raise error, 'bogus escape: %s' % repr('\\' + escape)
return (LITERAL, int(escape, 16) & 255)
elif str(escape[1:2]) in OCTDIGITS:
while source.next in OCTDIGITS and len(escape) < 5:
escape = escape + source.get()
escape = escape[1:]
return (LITERAL, int(escape, 8) & 255)
if len(escape) == 2:
return (LITERAL, ord(escape[1]))
except ValueError:
pass
raise error, 'bogus escape: %s' % repr(escape)
def _escape(source, escape, state):
code = CATEGORIES.get(escape)
if code:
return code
code = ESCAPES.get(escape)
if code:
return code
try:
if escape[1:2] == 'x':
while source.next in HEXDIGITS and len(escape) < 4:
escape = escape + source.get()
if len(escape) != 4:
raise ValueError
return (LITERAL, int(escape[2:], 16) & 255)
elif escape[1:2] == '0':
while source.next in OCTDIGITS and len(escape) < 4:
escape = escape + source.get()
return (LITERAL, int(escape[1:], 8) & 255)
elif escape[1:2] in DIGITS:
here = source.tell()
if source.next in DIGITS:
escape = escape + source.get()
if escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and source.next in OCTDIGITS:
escape = escape + source.get()
return (LITERAL, int(escape[1:], 8) & 255)
group = _group(escape, state.groups)
if group:
return (GROUPREF, group)
raise ValueError
if len(escape) == 2:
return (LITERAL, ord(escape[1]))
except ValueError:
pass
raise error, 'bogus escape: %s' % repr(escape)
def _parse_sub(source, state, nested = 1):
items = []
while 1:
items.append(_parse(source, state))
if source.match('|'):
continue
if not nested:
break
if not (source.next) or source.match(')', 0):
break
else:
raise error, 'pattern not properly closed'
if len(items) == 1:
return items[0]
subpattern = SubPattern(state)
while 1:
prefix = None
for item in items:
if prefix is None:
prefix = item[0]
elif item[0] != prefix:
break
else:
for item in items:
del item[0]
break
for item in items:
pass
else:
set = []
for item in items:
set.append(item[0])
return subpattern
subpattern.append((BRANCH, (None, items)))
return subpattern
def _parse(source, state):
subpattern = SubPattern(state)
while 1:
if source.next in ('|', ')'):
break
this = source.get()
if this is None:
break
if state.flags & SRE_FLAG_VERBOSE:
if this in WHITESPACE:
continue
if this == '#':
while 1:
this = source.get()
if this in (None, '\n'):
break
continue
if this and this[0] not in SPECIAL_CHARS:
subpattern.append((LITERAL, ord(this)))
elif this == '[':
set = []
if source.match('^'):
set.append((NEGATE, None))
start = set[:]
while 1:
this = source.get()
if this == ']' and set != start:
break
elif this and this[0] == '\\':
code1 = _class_escape(source, this)
elif this:
code1 = (LITERAL, ord(this))
else:
raise error, 'unexpected end of regular expression'
if source.match('-'):
this = source.get()
if this == ']':
if code1[0] is IN:
code1 = code1[1][0]
set.append(code1)
set.append((LITERAL, ord('-')))
break
elif this[0] == '\\':
code2 = _class_escape(source, this)
else:
code2 = (LITERAL, ord(this))
if code1[0] != LITERAL or code2[0] != LITERAL:
raise error, 'illegal range'
lo = code1[1]
hi = code2[1]
if hi < lo:
raise error, 'illegal range'
set.append((RANGE, (lo, hi)))
elif code1[0] is IN:
code1 = code1[1][0]
set.append(code1)
if len(set) == 1 and set[0][0] is LITERAL:
subpattern.append(set[0])
elif len(set) == 2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
subpattern.append((NOT_LITERAL, set[1][1]))
else:
subpattern.append((IN, set))
elif this and this[0] in REPEAT_CHARS:
if this == '?':
(min, max) = (0, 1)
elif this == '*':
(min, max) = (0, MAXREPEAT)
elif this == '+':
(min, max) = (1, MAXREPEAT)
elif this == '{':
here = source.tell()
(min, max) = (0, MAXREPEAT)
lo = hi = ''
while source.next in DIGITS:
lo = lo + source.get()
if source.match(','):
while source.next in DIGITS:
hi = hi + source.get()
else:
hi = lo
if not source.match('}'):
subpattern.append((LITERAL, ord(this)))
source.seek(here)
continue
if lo:
min = int(lo)
if hi:
max = int(hi)
else:
raise error, 'not supported'
if subpattern:
item = subpattern[-1:]
else:
raise error, 'nothing to repeat'
if source.match('?'):
subpattern[-1] = (MIN_REPEAT, (min, max, item))
else:
subpattern[-1] = (MAX_REPEAT, (min, max, item))
elif this == '.':
subpattern.append((ANY, None))
elif this == '(':
group = 1
name = None
if source.match('?'):
group = 0
if source.match('P'):
if source.match('<'):
name = ''
while 1:
char = source.get()
if char is None:
raise error, 'unterminated name'
if char == '>':
break
name = name + char
group = 1
if not isname(name):
raise error, 'illegal character in group name'
elif source.match('='):
name = ''
while 1:
char = source.get()
if char is None:
raise error, 'unterminated name'
if char == ')':
break
name = name + char
if not isname(name):
raise error, 'illegal character in group name'
gid = state.groupdict.get(name)
if gid is None:
raise error, 'unknown group name'
subpattern.append((GROUPREF, gid))
continue
else:
char = source.get()
if char is None:
raise error, 'unexpected end of pattern'
raise error, 'unknown specifier: ?P%s' % char
elif source.match(':'):
group = 2
elif source.match('#'):
while 1:
if source.next is None or source.next == ')':
break
source.get()
if not source.match(')'):
raise error, 'unbalanced parenthesis'
continue
elif source.next in ('=', '!', '<'):
char = source.get()
dir = 1
if char == '<':
if source.next not in ('=', '!'):
raise error, 'syntax error'
dir = -1
char = source.get()
p = _parse_sub(source, state)
if not source.match(')'):
raise error, 'unbalanced parenthesis'
if char == '=':
subpattern.append((ASSERT, (dir, p)))
else:
subpattern.append((ASSERT_NOT, (dir, p)))
continue
else:
while FLAGS.has_key(source.next):
state.flags = state.flags | FLAGS[source.get()]
if group:
if group == 2:
group = None
else:
group = state.getgroup(name)
p = _parse_sub(source, state)
if not source.match(')'):
raise error, 'unbalanced parenthesis'
subpattern.append((SUBPATTERN, (group, p)))
else:
while 1:
char = source.get()
if char is None or char == ')':
break
raise error, 'unknown extension'
elif this == '^':
subpattern.append((AT, AT_BEGINNING))
elif this == '$':
subpattern.append((AT, AT_END))
elif this and this[0] == '\\':
code = _escape(source, this, state)
subpattern.append(code)
else:
raise error, 'parser error'
return subpattern
def parse(str, flags = 0, pattern = None):
source = Tokenizer(str)
if pattern is None:
pattern = Pattern()
pattern.flags = flags
p = _parse_sub(source, pattern, 0)
tail = source.get()
if tail == ')':
raise error, 'unbalanced parenthesis'
elif tail:
raise error, 'bogus characters at end of regular expression'
if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
return parse(str, p.pattern.flags)
return p
def parse_template(source, pattern):
s = Tokenizer(source)
p = []
a = p.append
while 1:
this = s.get()
if this is None:
break
if this and this[0] == '\\':
if this == '\\g':
name = ''
if s.match('<'):
while 1:
char = s.get()
if char is None:
raise error, 'unterminated group name'
if char == '>':
break
name = name + char
if not name:
raise error, 'bad group name'
try:
index = int(name)
except ValueError:
if not isname(name):
raise error, 'illegal character in group name'
try:
index = pattern.groupindex[name]
except KeyError:
raise IndexError, 'unknown group name'
a((MARK, index))
elif len(this) > 1 and this[1] in DIGITS:
code = None
while 1:
group = _group(this, pattern.groups + 1)
if group:
if s.next not in DIGITS or not _group(this + s.next, pattern.groups + 1):
code = (MARK, int(group))
break
elif s.next in OCTDIGITS:
this = this + s.get()
else:
break
if not code:
this = this[1:]
code = (LITERAL, int(this[-6:], 8) & 255)
a(code)
else:
try:
a(ESCAPES[this])
except KeyError:
for c in this:
a((LITERAL, ord(c)))
except:
0
else:
a((LITERAL, ord(this)))
return p
def expand_template(template, match):
p = []
a = p.append
sep = match.string[:0]
if type(sep) is type(''):
char = chr
else:
char = unichr
for c, s in template:
if c is LITERAL:
a(char(s))
elif c is MARK:
s = match.group(s)
if s is None:
raise error, 'empty group'
a(s)
return string.join(p, sep)